package org.wisdomdata.selenium.processor;


import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

import org.openqa.selenium.SearchContext;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.springframework.beans.factory.annotation.Required;
import org.wisdomdata.common.AbstractProcessor;
import org.wisdomdata.framework.Processor;
import org.wisdomdata.selenium.SeleniumElementsExtractor;
import org.wisdomdata.selenium.SeleniumStringExtractor;
import org.wisdomdata.selenium.action.JumpActionByString;
import org.wisdomdata.selenium.strategy.RecrawlStrategy;


/**
 * 本操作执行当前页循环，定位一组相同元素，然后一个一个执行
 * */
public class SeleniumElementsLoopProcessor extends SeleniumActionProcessor {
	private final static Logger logger =
			Logger.getLogger(SeleniumElementsLoopProcessor.class.getName());
	private RecrawlStrategy recrawlStrategy;
	
	
	public RecrawlStrategy getRecrawlStrategy() {
		return recrawlStrategy;
	}
	
	@Required
	public void setRecrawlStrategy(RecrawlStrategy recrawlStrategy) {
		this.recrawlStrategy = recrawlStrategy;
	}

	/**
	 * 这是需要的是元素抽取器
	 * */
	private SeleniumElementsExtractor elementsExtractor;

	public SeleniumElementsExtractor getElementsExtractor() {
		return elementsExtractor;
	}

	public void setElementsExtractor(SeleniumElementsExtractor elementsExtractor) {
		this.elementsExtractor = elementsExtractor;
	}

	
	
	/**
	 * ElementsLoopProcessor 构造器，它标配一个JumpActionByString action
	 * */
	public SeleniumElementsLoopProcessor() {
		super();
		this.setAction(new JumpActionByString());
	}

	public void innerProcess() {		
		//获取本层需要处理的元素
		this.elementsExtractor.extract();
		List<WebElement> elements = this.elementsExtractor.getExtractResults();
		
		int length = elements.size();
		if (this.getRecrawlStrategy().isLastPage()) {
			int lastPageNumber = this.getRecrawlStrategy().getLastPageNeedCrawlItemsNumber();
			if (lastPageNumber > length)
				logger.warning("SeleniumElementsLoopProcessor： last page number is bigger than"
						+ " the real last page number, maybe action is error!");
			else 
				length = lastPageNumber;
			this.getRecrawlStrategy().setLastPage(false);
		}
		//要想实现ElementsLoop，并且在每一个Element都深入一层，那么必须保证Element里面有一个超链接
		logger.info("SeleniumElementsLoopProcessor: get " + length + " elements..");
		for (int i = 0; i < length; i++) {
			this.commonExtract(elements.get(i));
		}
		logger.info("SeleniumElementsLoopProcessor:" + length + " elements already success extract!");
		
		List<String> jumpUrls = this.getTaskCommon().tables.get(this.getTableName()).get("NEXTURLS");
		if (this.getChildProcessors() != null && this.getChildProcessors().size() > 0 
				&& jumpUrls != null && jumpUrls.size() > 0) {
			for (String uri : jumpUrls) {
				logger.info("SeleniumElementsLoopProcessor:" + uri + " begin deal...");
				//跳到某一页，所有的子处理器处理完，然后继续
				((JumpActionByString) getAction()).setUri(uri);
				((JumpActionByString) getAction()).setSearchContext(this.getSearchContext());
				//循环执行下一个页面的时候，先跳转
				this.doAction();
				for (Processor processor : this.getChildProcessors()) {
					processor.process();
				}
				logger.info("SeleniumElementsLoopProcessor:" + uri + " deal done.");
			}
		}
	}
	
	protected void commonExtract(WebElement e) {
		//做这个页面的抽取工作
		if (this.getExtractors() != null) {
			//如果需要抽取就准备接收抽取的结果，逻辑上是每个页面一个表
			//所以这里先得到这张表
			Map<String, List<String>> table = this.getTaskCommon().tables.get(this.getTableName());
			//如果这张表还没有建立，建立这张表
			if (table == null) {
				table = new LinkedHashMap<String, List<String>>();
				this.getTaskCommon().tables.put(this.getTableName(), table);
			}
			
			for (SeleniumStringExtractor slse : this.getExtractors()) {
				SearchContext old = slse.getSearchContext();
				slse.setSearchContext(e);
				slse.extract();
				//获取表中某一列
				List<String> extractResult = table.get(slse.getExtractName());
				if (extractResult == null) {
					//如果这一列为空，则新建一列
					extractResult = new ArrayList<String>();
					table.put(slse.getExtractName(), extractResult);
				} 
				//判断抽取的结果如何，如果没有抽取的任何信息，给出提示，如果抽取到了任何信息则加入列中
				if (slse.getExtractResult() == null) {
					logger.warning("property " + slse.getExtractName() + " can not find!");
				} else {
					//logger.info("property " + slse.getExtractName() + " extract result :" + slse.getExtractResult());
					extractResult.add(slse.getExtractResult());
				}
			
				slse.setSearchContext(old);
			}
			
			//如果需要保存提取信息的来源页面，这可以保存成最后一列
			if (this.isNeedStoreTargetUri()) {
				List<String> targetUri = table.get(AbstractProcessor.TARGET_URL);
				if (targetUri == null) {
					//如果这一列为空，则新建一列
					targetUri = new ArrayList<String>();
					table.put(AbstractProcessor.TARGET_URL, targetUri);
				} 
				targetUri.add(((WebDriver) this.getSearchContext()).getCurrentUrl());
			}
			
		}		
	}
}
